home *** CD-ROM | disk | FTP | other *** search
/ ftp.cs.arizona.edu / ftp.cs.arizona.edu.tar / ftp.cs.arizona.edu / icon / newsgrp / group98b.txt / 000158_icon-group-sender _Tue Aug 25 08:21:48 1998.msg < prev    next >
Internet Message Format  |  2000-09-20  |  5KB

  1. Return-Path: <icon-group-sender>
  2. Received: from kingfisher.CS.Arizona.EDU (kingfisher.CS.Arizona.EDU [192.12.69.239])
  3.     by baskerville.CS.Arizona.EDU (8.9.1a/8.9.1) with SMTP id IAA16983
  4.     for <icon-group-addresses@baskerville.CS.Arizona.EDU>; Tue, 25 Aug 1998 08:21:48 -0700 (MST)
  5. Received: by kingfisher.CS.Arizona.EDU (5.65v4.0/1.1.8.2/08Nov94-0446PM)
  6.     id AA15430; Tue, 25 Aug 1998 08:21:24 -0700
  7. Message-Id: <35E20EF3.6DAEA82A@ix.netcom.com>
  8. Date: Mon, 24 Aug 1998 21:10:12 -0400
  9. From: Phillip Lee Thomas <teruthom@ix.netcom.com>
  10. Reply-To: thomaspl@acm.org
  11. X-Mailer: Mozilla 4.05 [en] (Win95; U)
  12. Mime-Version: 1.0
  13. To: "Dr. Louis A. Turk" <laturk@ibm.net>, icon-group@optima.CS.Arizona.EDU
  14. Subject: Re: Why doen't this work?
  15. References: <2.2.32.19980822053809.003390d8@pop5.ibm.net>
  16. Content-Type: text/plain; charset=us-ascii
  17. Content-Transfer-Encoding: 7bit
  18. Content-Transfer-Encoding: 7bit
  19. Content-Transfer-Encoding: 7bit
  20. Errors-To: icon-group-errors@optima.CS.Arizona.EDU
  21. Content-Transfer-Encoding: 7bit
  22. Status: RO
  23. Content-Length: 3736
  24.  
  25. I'd have to see how this works against a piece of text, but you have a problem in the first
  26. few lines if a single line has the shape:   <P>some text</P> as you write this line out and
  27. then read in further lines until you find one ending in </P>. Looking at this rather
  28. briefly, I would think you would produce a number of lines glued together as "<P>some
  29. text</P> some more text and some more and <P> something that meets condition 2</P>".
  30.  
  31. Secondly, HTML glues the whole thing together for parsing purposes so it is quite possible
  32. that in a single input line you could get several "<P>...</P>" sequences.
  33.  
  34. I suggest that you read the whole document in with a single reads() if this doesn't blow
  35. your memory. Then use map(line,"\n"," ") to convert new lines to spaces, and finally do a
  36. string scan along the line, chopping off pieces as you go:
  37.     megaline ? {
  38.         while line  :=tab(find("<P>")) do  {
  39.              line ||:=  tab(find("</P>") +4)
  40.              write(out, line)
  41.              }  # while in <P>...</P>
  42.       }  # while scan
  43.  
  44. You'd have to fiddle a bit with this but it's close to being right.
  45.  
  46. Sound possible?
  47.  
  48. Phillip Thomas
  49.  
  50. Dr. Louis A. Turk wrote:
  51.  
  52. > Can anybody tell me why this code only removes CR/LF's every other paragraph
  53. > that contains them? Why
  54. > does it skip a paragraph?
  55. >
  56. > Louis
  57. >
  58. > Obviously, there will be more to this program, once I get past this problem.
  59. >
  60. > ############################################################################
  61. > #############
  62. > #
  63. > #  HTML TO Nota Bene 4.5 FILTER
  64. > #  Ver. 1.0 Aug.
  65. > #  Programmer: Louis A. Turk
  66. > #
  67. > #  USE: Coverts HTML to Note Bene using two passes.  FIRST PASS:
  68. > #       1. Removes the CR/LF's between <P> and </P>
  69. > #       2. Removes the CR/LF's between <UL> and </UL> and also removes right
  70. > indention.
  71. > #       SECOND PASS:
  72. > #       3. Replaces all HTML code with Nota Bene code.
  73. > #
  74. > ############################################################################
  75. > ##############
  76. >
  77. > link graphics
  78. >
  79. > procedure main(arg)
  80. >
  81. >         WOpen("size=1005,850")
  82. >
  83. >         infile    := arg[1]
  84. >         outfile   := arg[2]
  85. >         tempfile  := "temp3.txt"
  86. >
  87. >         in  := open(infile,"r") | stop("Can't open file: ",in)
  88. >         out := open(outfile,"w") | stop("Can't open file: ",out)
  89. >         tmp := open(tempfile,"c") | stop("Can't open file: ",tmp)
  90. >
  91. >         #### FIRST PASS: REMOVE EXCESS CR/LF's
  92. > ######################################
  93. >
  94. >         while line := read(in) do {
  95. >                 if find(line,"<P>") then {                            # Beginning of
  96. > defective code
  97. >                         WWrites(line," ")
  98. >                         writes(tmp,line," ")
  99. >                         until find(line := read(in),"</P>") do {
  100. >                                 WWrites(line," ")
  101. >                                 writes(tmp,line," ")
  102. >                         }
  103. >                         WWrite(line)
  104. >                         write(tmp,line)
  105. >                 }
  106. >                 else if find(line,"<UL>") then {
  107. >                         WWrites(line," ")
  108. >                         writes(tmp,line," ")
  109. >                         until find(line := read(in),"</UL>") do {
  110. >                                 WWrites(line," ")
  111. >                                 writes(tmp,line," ")
  112. >                         }
  113. >                         WWrite(line)
  114. >                         write(tmp,line)
  115. >                 }                                                     # End of defective
  116. > code
  117. >                 else {
  118. >                         WWrite(line)
  119. >                         write(tmp,line)
  120. >                 }
  121. >
  122. >         }
  123. >         ##### SECOND PASS: #######################################
  124. > Event()
  125. > end
  126.  
  127.  
  128.  
  129.